;; Generate char-tables.iscm which is included by srfi14.scm.

(import (srfi :14 char-sets))
(import (rnrs sorting))
(import (kawa regex))

(define prog-name (regex-replace (regex "^.*/") (car (command-line)) ""))
(define out ::output-port (current-output-port))

(format out ";; This -*- scheme -*- file is generated by ~a.  DO NOT EDIT.~%"
        prog-name)

(define (print-list name cs::char-set #!optional (out (current-output-port)))
  (let* ((iarr ::int[] cs:inversion-list)
         (n ::int cs:inversion-list-size)
         (ivec (gnu.lists.S32Vector iarr)))
    ;;(format (current-error-port) "print-list ~a n:~d arr-len:~d~%~!" name n iarr:length)
    (format out
            &{(define-early-constant %&[name] ::int[]
              &|    (constant-fold int[] ~@<~{~:_~d ~}~:_~d~:>))
              &|}
              (ivec:subList 0 (- n 1))
              (ivec (- n 1)))))

(define-syntax filter-with-method
  (syntax-rules ()
    ((_ M)
     (char-set-filter
      (lambda (c) (M (char->integer c)))
      char-set:full))))

(define cs-blank
   ;; The set of blank characters is defined to be the union of U+0009
  ;; and Unicode category Zs.
  (char-set-filter
   (lambda (c)
     (or (char=? c #\u0009)
         (let ((type ::byte (java.lang.Character:get-type
                             (char->integer c))))
           (= type java.lang.Character:SPACE_SEPARATOR))))
   char-set:full))

(define cs-whitespace
   ;; The set of whitespace characters is all the characters in
   ;; Unicode categories Zs, Zl or Zp, along with points 9-13.
  (char-set-filter
   (lambda (c)
     (or (char=? c #\u0009)
         (char=? c #\u000a)
         (char=? c #\u000b)
         (char=? c #\u000c)
         (char=? c #\u000d)
         (let ((type ::byte (java.lang.Character:get-type
                             (char->integer c))))
           (or (= type java.lang.Character:SPACE_SEPARATOR)
               (= type java.lang.Character:LINE_SEPARATOR)
               (= type java.lang.Character:PARAGRAPH_SEPARATOR)))))
   char-set:full))

(print-list "title-case"
            (filter-with-method java.lang.Character:title-case?))

(print-list "whitespace" cs-whitespace)

(print-list "blank" cs-blank)

(print-list "lower-case"
            (filter-with-method java.lang.Character:lower-case?))

(print-list "upper-case"
            (filter-with-method java.lang.Character:upper-case?))

(define cs-letter (filter-with-method java.lang.Character:letter?))
(print-list "letter" cs-letter)
(define cs-digit (filter-with-method java.lang.Character:digit?))
(print-list "digit" cs-digit)

(define cs-punctuation
  (char-set-filter
   (lambda (c)
     (let ((type ::byte (java.lang.Character:get-type
                         (char->integer c))))
       (or (= type java.lang.Character:CONNECTOR_PUNCTUATION)
           (= type java.lang.Character:DASH_PUNCTUATION)
           (= type java.lang.Character:START_PUNCTUATION)
           (= type java.lang.Character:END_PUNCTUATION)
           (= type java.lang.Character:INITIAL_QUOTE_PUNCTUATION)
           (= type java.lang.Character:FINAL_QUOTE_PUNCTUATION)
           (= type java.lang.Character:OTHER_PUNCTUATION))))
   char-set:full))
(print-list "punctuation" cs-punctuation)

(define cs-symbol
  (char-set-filter
   (lambda (c)
     (let ((type ::byte (java.lang.Character:get-type
                         (char->integer c))))
       (or (= type java.lang.Character:MATH_SYMBOL)
           (= type java.lang.Character:CURRENCY_SYMBOL)
           (= type java.lang.Character:MODIFIER_SYMBOL)
           (= type java.lang.Character:OTHER_SYMBOL))))
   char-set:full))
(print-list "symbol" cs-symbol)

(define cs-letter+digit
  (char-set-union cs-letter cs-digit))
  
(print-list "letter+digit" cs-letter+digit)

(define cs-graphic
  (char-set-union cs-letter+digit cs-punctuation cs-symbol))
(print-list "graphic" cs-graphic)

(define cs-printing
   (char-set-union cs-graphic cs-whitespace))
(print-list "printing" cs-printing)


